This notebook calculates vegetation phenology changes using Sentinel-2 data. To detect changes, the algorithm uses Normalized Difference Vegetation Index (NDVI) which is a common proxy for vegetation growth and health. The outputs of this notebook can be used to assess differences in agriculture fields over time or space and also allow the assessment of growing states such as planting and harvesting.
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import datacube
import sys, os
os.environ['USE_PYGEOS'] = '0'
from dea_tools.plotting import display_map
### EASI tools
sys.path.append(os.path.expanduser('../scripts'))
from ceos_utils.data_cube_utilities.clean_mask import landsat_clean_mask_invalid, landsat_qa_clean_mask
from easi_tools import EasiDefaults
from easi_tools import notebook_utils
easi = EasiDefaults() # Get the default parameters for this system
Successfully found configuration for deployment "eail"
cluster, client = notebook_utils.initialize_dask(use_gateway=False)
display(cluster if cluster else client)
print(notebook_utils.localcluster_dashboard(client, server=easi.hub))
fd25c130
| Dashboard: http://127.0.0.1:8787/status | Workers: 4 |
| Total threads: 8 | Total memory: 29.00 GiB |
| Status: running | Using processes: True |
Scheduler-43e63244-a1f8-4d6e-ae5b-a5726f5ef3df
| Comm: tcp://127.0.0.1:41341 | Workers: 4 |
| Dashboard: http://127.0.0.1:8787/status | Total threads: 8 |
| Started: Just now | Total memory: 29.00 GiB |
| Comm: tcp://127.0.0.1:42799 | Total threads: 2 |
| Dashboard: http://127.0.0.1:36831/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:38309 | |
| Local directory: /tmp/dask-worker-space/worker-dru4dx4n | |
| Comm: tcp://127.0.0.1:37255 | Total threads: 2 |
| Dashboard: http://127.0.0.1:35541/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:36009 | |
| Local directory: /tmp/dask-worker-space/worker-y85hl2ex | |
| Comm: tcp://127.0.0.1:40441 | Total threads: 2 |
| Dashboard: http://127.0.0.1:43233/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:41401 | |
| Local directory: /tmp/dask-worker-space/worker-ne_5nczt | |
| Comm: tcp://127.0.0.1:42629 | Total threads: 2 |
| Dashboard: http://127.0.0.1:37859/status | Memory: 7.25 GiB |
| Nanny: tcp://127.0.0.1:42079 | |
| Local directory: /tmp/dask-worker-space/worker-tmdzbu6t | |
https://hub.eail.easi-eo.solutions/user/jhodge/proxy/8787/status
from datacube.utils.aws import configure_s3_access
configure_s3_access(aws_unsigned=False, requester_pays=True, client=client)
<botocore.credentials.Credentials at 0x7f5183480400>
# Select a Product and Platform
product = "s2_l2a"
platform = "Sentinel-2A"
# NEW Yield Data from Vietnam (18-Nov-2022)
# lat_long = (10.443492, 105.281103) # 17, Chau Thanh, Yield High
# lat_long = (10.4172, 105.3635) # 28, Chau Thanh, Low High
# lat_long = (10.454342, 105.322838) #6, Chau Thanh, High Yield
# lat_long = (10.434116, 105.273150) #13, Chau Thanh, Low Yield
# lat_long = (10.392899, 105.188514) #37, Chau Thanh, High Yield
# lat_long = (10.394341, 105.126836) #47, Chau Thanh, Low Yield
# lat_long = (10.356519, 105.309450) #146, Chau Thanh, High Yield
# lat_long = (10.354744, 105.336739) #142, Chau Thanh, Low Yield
# box_size_deg = 0.0004 # Typically yields 5x5 pixel region
# Calculate the latitude and longitude bounds of the analysis box
# latitude = (lat_long[0]-box_size_deg/2, lat_long[0]+box_size_deg/2)
# longitude = (lat_long[1]-box_size_deg/2, lat_long[1]+box_size_deg/2)
latitude = easi.latitude
longitude = easi.longitude
# Define Time Range
# The format of the time date is YYYY-MM-DD
start_date = '2022-04-01'
end_date = '2022-09-01'
time_extents = (start_date,end_date)
# The code below renders a map that can be used to view the region.
display_map(longitude,latitude)
/env/lib/python3.10/site-packages/dea_tools/plotting.py:313: FutureWarning: This function is deprecated. See: https://pyproj4.github.io/pyproj/stable/gotchas.html#upgrading-to-pyproj-2-from-pyproj-1
all_longitude, all_latitude = transform(Proj(crs), Proj('EPSG:4326'), all_x,
dc = datacube.Datacube()
sentinel_dataset = dc.load(latitude = latitude,
longitude = longitude,
time = time_extents,
product = product,
group_by = 'solar_day',
measurements = ['red', 'nir', 'SCL'],
output_crs = 'EPSG:6933',
resolution = (-10,10),
dask_chunks = {'time':1})
# Filter data using SCL band classification
# scl=0 > No Data
# scl=1 > Saturated
# scl=3 > Cloud Shadows
# scl=6 > Water
# scl=8 > Cloud Medium Probability
# scl=9 > Cloud High Probability
# scl=10 > Thin Cirrus Cloud
cloud_mask = (sentinel_dataset.SCL != 0) & (sentinel_dataset.SCL != 1) & \
(sentinel_dataset.SCL != 3) & (sentinel_dataset.SCL != 8) & \
(sentinel_dataset.SCL != 9) & (sentinel_dataset.SCL != 10)
land_mask = ((sentinel_dataset.SCL != 6) & cloud_mask)
# Drop the SCL data as it is no longer needed
sentinel_dataset = sentinel_dataset.drop('SCL')
# Apply land mask ... NO Clouds, NO Cloud Shadows and NO Water pixels
cleaned_dataset = sentinel_dataset.where(land_mask)
def NDVI(dataset):
return (dataset.nir - dataset.red)/(dataset.nir + dataset.red)
sentinel_dataset['NDVI'] = NDVI(sentinel_dataset)
cleaned_dataset['NDVI'] = NDVI(cleaned_dataset)
cleaned_dataset
<xarray.Dataset>
Dimensions: (time: 30, y: 1024, x: 966)
Coordinates:
* time (time) datetime64[ns] 2022-04-05T16:02:51 ... 2022-08-28T16:...
* y (y) float64 4.418e+06 4.418e+06 ... 4.408e+06 4.408e+06
* x (x) float64 -7.386e+06 -7.386e+06 ... -7.376e+06 -7.376e+06
spatial_ref int32 6933
Data variables:
red (time, y, x) float64 dask.array<chunksize=(1, 1024, 966), meta=np.ndarray>
nir (time, y, x) float64 dask.array<chunksize=(1, 1024, 966), meta=np.ndarray>
NDVI (time, y, x) float64 dask.array<chunksize=(1, 1024, 966), meta=np.ndarray>
Attributes:
crs: EPSG:6933
grid_mapping: spatial_ref# Plot the monthly time slice data in a table
import pandas as pd
pd.DataFrame({'time': cleaned_dataset.time.values})
| time | |
|---|---|
| 0 | 2022-04-05 16:02:51 |
| 1 | 2022-04-10 16:02:54 |
| 2 | 2022-04-15 16:02:48 |
| 3 | 2022-04-20 16:02:57 |
| 4 | 2022-04-25 16:02:46 |
| 5 | 2022-04-30 16:02:57 |
| 6 | 2022-05-05 16:02:48 |
| 7 | 2022-05-10 16:02:56 |
| 8 | 2022-05-15 16:02:51 |
| 9 | 2022-05-20 16:02:59 |
| 10 | 2022-05-25 16:02:52 |
| 11 | 2022-05-30 16:03:00 |
| 12 | 2022-06-04 16:02:53 |
| 13 | 2022-06-09 16:03:01 |
| 14 | 2022-06-14 16:02:56 |
| 15 | 2022-06-19 16:03:04 |
| 16 | 2022-06-24 16:02:57 |
| 17 | 2022-06-29 16:03:05 |
| 18 | 2022-07-04 16:02:57 |
| 19 | 2022-07-09 16:03:05 |
| 20 | 2022-07-14 16:02:57 |
| 21 | 2022-07-19 16:03:04 |
| 22 | 2022-07-24 16:02:57 |
| 23 | 2022-07-29 16:03:02 |
| 24 | 2022-08-03 16:02:57 |
| 25 | 2022-08-08 16:03:04 |
| 26 | 2022-08-13 16:02:55 |
| 27 | 2022-08-18 16:03:05 |
| 28 | 2022-08-23 16:02:53 |
| 29 | 2022-08-28 16:03:05 |
nanmask = np.any(np.isfinite(cleaned_dataset.NDVI), axis=(1,2))
plt.figure(figsize=(12, 6))
plt.plot(cleaned_dataset.time[nanmask],
cleaned_dataset['NDVI'][nanmask].median(dim=['x','y']),
color='red',marker='o')
plt.xlabel("Index")
plt.ylabel("NDVI")
plt.title("NDVI = Vegetation Index");
/env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject( /env/lib/python3.10/site-packages/rasterio/warp.py:344: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned. _reproject(
# Output data to CSV
filename = "output.csv"
img3 = cleaned_dataset['NDVI']
img5 = img3.median(dim=['y','x'])
img5.to_dataframe().to_csv(filename)